*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	This program appends and cleans the raw test score files for Colorado state
*	standardized exams (CSAP and CMAS), as well as ACT and SAT scores. It then
*	reshapes the raw data which is long on test, to be unique at the
*	student-year level.
*	----------------------------------------------------------------------------
*	IN: 	ARE_PARCC(20152016).txt
*			ARE_CSAP_ACT_AP.txt
*			ARE_CSAP_ACT_AP(2013).txt
*			ARE_CSAP_ACT_AP(2014).txt
*			ARE_ACT_AP(20152016).txt
*			ARE_PARCC(2017).txt
*			ARE_PSAT_SAT(2017).txt
*
*	OUT: 	tests.dta (unique on student-year)
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	local tests ARE_PARCC(20152016) ARE_CSAP_ACT_AP ARE_CSAP_ACT_AP(2013) ///
			 ARE_CSAP_ACT_AP(2014) ARE_ACT_AP(20152016) ARE_PARCC(2017) ///
			 ARE_PSAT(2016) ARE_PSAT_SAT(2017) CMAS_2018_2019 COSAT_2018_2019

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


***	build

	gen hold = ""
	tempfile append
	sa `append'

	foreach test of local tests {
		import delimited using "${rawdata}tests/`test'.txt", clear stringcols(_all)
		rename maskedstu stu
		append using `append', force
		sa `append' , replace
	}

***	clean

	replace ss = "" if ss == "--"
	destring ss grade stu year, replace

	destring proflvl, gen(ap_score) force
	replace ap_score = . if  testcategory != "AP"

**	sample selection

	drop if ss == .

*	only important tests
	keep if inlist(testname,"CMAS ELA","CMAS Math","CSAP Math","CSAP Reading", "CSAP Writing","Colorado ACT") ///
	 		| inlist(testname,"SAT","PSAT", "Colorado SAT", "Colorado PSAT", "Colorado PSAT89")

	//recode PSAT SAT (called just SAT/PSAT in 2017, Colorado SAT/PSAT in 2018/19)
	replace testname = "PSAT" if inlist(testname,"Colorado PSAT","Colorado PSAT89")
	replace testname = "SAT" if inlist(testname,"Colorado SAT")

	//drop PSAT SAT sub-measures
	drop if testname=="SAT" & !inlist(measurecode,"Reading","Math")
	drop if testname=="PSAT" & !inlist(measurecode,"Reading","Math")

*	duplicates
	//keep first grade of each year
	destring grade, replace
	bys stu year: egen min_math_grade = min(grade)
	drop if grade != min_math_grade

	//drop duplicates due to paper tests (keep non-paper CSAP Math)
	gen paper = testname == "CSAPA Paper Math"
	duplicates tag stu year, gen(duplicate)
	drop if duplicate & paper == 1

	//for remaining dups, keeping max
	bys stu year measure: egen max_score = max(ss)
	drop if ss != max_score

**	recoding tests

*	math

	//We have CSAP until 2014 and CMAS from 2015 onwards; these are both
	//CO's standardized tests so we can treat them as the same exam
	//but note that CMAS is tracked in HS. We end up dropping HS CMAS math as a result
	replace testname = "math" if inlist(testname,"CMAS Math","CSAP Math")

*	ela

	//CMAS merged Reading and Writing into one ELA category, so we
	//construct CSAP ELA as the average of R+W
	bys stu year: egen csap_R = max(ss*(testname=="CSAP Reading"))
	bys stu year: egen csap_W = max(ss*(testname=="CSAP Writing"))
	replace ss = (csap_R+csap_W)/2 if testname=="CSAP Reading"

	//drop if missing a W score (max sent miss to zero)
	drop if testname=="CSAP Reading" & csap_W == 0

	drop if testname=="CSAP Writing"
	replace testname="ela" if inlist(testname,"CMAS ELA", "CSAP Reading")

	//ACT
	replace testname="act_comp" 	if testname=="Colorado ACT" & measurecode=="Composite"
	replace testname="act_english" 	if testname=="Colorado ACT" & measurecode=="English"
	replace testname="act_math" 	if testname=="Colorado ACT" & measurecode=="Math"
	replace testname="act_reading" 	if testname=="Colorado ACT" & measurecode=="Reading"
	replace testname="act_science" 	if testname=="Colorado ACT" & measurecode=="Science"

	//PSAT 8/9 (9th grade outcomes 2018-19)
	replace testname="math" 	   	if testname=="PSAT" & measurecode=="Math" & grade==9
	replace testname="ela"  	   	if testname=="PSAT" & measurecode=="Reading" & grade==9
	//PSAT 10 (10th grade 2016-19)
	replace testname="math" 	   	if testname=="PSAT" & measurecode=="Math" & grade==10
	replace testname="ela"  		if testname=="PSAT" & measurecode=="Reading" & grade==10
	//SAT (11th grade 2016-19)
	replace testname="math" 		if testname=="SAT" & measurecode=="Math" & grade==11
	replace testname="ela" 			if testname=="SAT" & measurecode=="Reading" & grade==11

**	standardizing scores

	//standardize within test-year-grades
	bys year grade testname: egen mean = mean(ss)
	bys year grade testname: egen sd = sd(ss)

	gen score = (ss-mean)/sd
	rename ss raw

	drop if mi(score)

**	proficiency rates (advanced or proficient on CSAP; met or exceeded expectations on CMAS)
	g proficient = inlist(proflvl,"A","P","4","5")
	replace proficient = . if !inlist(testname,"ela","math") // set proficiency for ACT scores to missing

**	reshape (to wide)

	keep stu grade year testname score raw proficient

	reshape wide score raw proficient, i(stu grade year) j(testname) string

	rename score* *
	rename raw* *_raw
	rename proficient* prof_*
	drop prof_act*

	tostring grade, replace
	rename grade test_grade

*	generate combined math and ELA scores

	g total_raw = ela_raw + math_raw
	bys year test_grade: egen mean = mean(total_raw)
	bys year test_grade: egen sd   = sd(total_raw)
	g total = (total_raw - mean) / sd
	drop mean sd

***	save

	isid stu year

	save "${cleandata}tests.dta", replace
